# =============================================================================
# MAIN ANALYSES: TABLES 2 AND 3
# Primary replication analyses for Kim & Sudduth and Boix & Svolik
# =============================================================================

# --- PACKAGES ----------------------------------------------------------------
library(haven)
library(dplyr)
library(flexsurv)
library(survival)
library(texreg)
library(stargazer)
library(readr)
library(sandwich)
library(lmtest)

# --- DATA LOADING ------------------------------------------------------------
est     <- read_csv("estimates_independent.csv")
ks_raw  <- haven::read_dta("kim_sudduth.dta") %>% dplyr::mutate(COWcode = cowcode)
bs_data <- haven::read_dta("leader_tvc_2.dta", encoding = "latin1") %>% 
  dplyr::mutate(COWcode = ccode)

# --- HELPER FUNCTIONS --------------------------------------------------------
# Function for clustering standard errors
cluster_se_vector <- function(model, cluster_vec) {
  sqrt(diag(sandwich::vcovHC(model, type = "HC1", cluster = cluster_vec)))
}

# =============================================================================
# TABLE 2: REPLICATION OF KIM AND SUDDUTH
# =============================================================================

# --- DATA PREPARATION --------------------------------------------------------
# Merge and create lags
ks_merged <- dplyr::inner_join(est, ks_raw, by = c("COWcode", "year")) %>%
  dplyr::arrange(COWcode, year) %>%
  dplyr::group_by(COWcode) %>%
  dplyr::mutate(
    dyn.estimates_lag = dplyr::lag(dyn.estimates, 1),
    dyn.up_lag        = dplyr::lag(dyn.up, 1),
    dyn.lo_lag        = dplyr::lag(dyn.lo, 1)
  ) %>%
  dplyr::ungroup()

# Exact sample for models (drop rows with missing lag or covariates)
ks_exact <- ks_merged %>%
  dplyr::filter(!is.na(dyn.estimates_lag))

# --- ANALYSIS ----------------------------------------------------------------
# Original (restricted) models using legislature dummy on identical sample
m2restricted <- glm(
  anycoupatt_fin ~ gwf_legislature + gwf_leadermil + ll_rgdpe_PW +
    lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(anyattyrs, 3),
  data = ks_exact, family = "binomial"
)

m5restricted <- glm(
  reshuffleatt_fin ~ gwf_legislature + gwf_leadermil + ll_rgdpe_PW +
    lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(shuffattyrs, 3),
  data = ks_exact, family = "binomial"
)

m8restricted <- glm(
  rechangeatt_fin ~ gwf_legislature + gwf_leadermil + ll_rgdpe_PW +
    lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(rechattyrs, 3),
  data = ks_exact, family = "binomial"
)

# New models with latent measure (lagged)
m2new <- glm(
  anycoupatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW +
    lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(anyattyrs, 3),
  data = ks_merged, family = "binomial"
)


m5new <- glm(
  reshuffleatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW +
    lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(shuffattyrs, 3),
  data = ks_merged, family = "binomial"
)


m8new <- glm(
  rechangeatt_fin ~ dyn.estimates_lag + gwf_leadermil + ll_rgdpe_PW +
    lag_growth_rgdpe_PW + ll_pop_PW + postcold + poly(rechattyrs, 3),
  data = ks_merged, family = "binomial"
)

# Calculate clustered standard errors
cl_ks_exact  <- ks_exact$COWcode
cl_ks_merged <- ks_merged$COWcode

se_m2r <- cluster_se_vector(m2restricted, cl_ks_exact)
se_m5r <- cluster_se_vector(m5restricted, cl_ks_exact)
se_m8r <- cluster_se_vector(m8restricted, cl_ks_exact)

se_m2n <- cluster_se_vector(m2new, cl_ks_merged)
se_m5n <- cluster_se_vector(m5new, cl_ks_merged)
se_m8n <- cluster_se_vector(m8new, cl_ks_merged)

# --- OUTPUT ------------------------------------------------------------------
stargazer::stargazer(
  m2restricted, m2new, m5restricted, m5new, m8restricted, m8new,
  type = "latex",
  se = list(se_m2r, se_m2n, se_m5r, se_m5n, se_m8r, se_m8n),
  column.labels = c("Restricted", "Replication", "Restricted", "Replication", 
                    "Restricted", "Replication"),
  star.cutoffs = c(0.1, 0.05, 0.01),
  notes.append = FALSE
)

# =============================================================================
# TABLE 3: REPLICATION OF BOIX AND SVOLIK
# =============================================================================

# --- DATA PREPARATION --------------------------------------------------------
# One row per COWcode-year from latent estimates
latent_unique <- est %>%
  dplyr::distinct(COWcode, year, .keep_all = TRUE)

# Merge, order, and build lags / time0
df_master <- bs_data %>%
  dplyr::inner_join(latent_unique, by = c("COWcode", "year")) %>%
  dplyr::arrange(COWcode, year, leadid) %>%
  dplyr::group_by(COWcode) %>%
  dplyr::mutate(
    dyn.estimates_lag = dplyr::lag(dyn.estimates, 1),
    dyn.up_lag        = dplyr::lag(dyn.up, 1),
    dyn.lo_lag        = dplyr::lag(dyn.lo, 1)
  ) %>%
  dplyr::ungroup() %>%
  dplyr::group_by(leadid) %>%
  dplyr::mutate(time0 = dplyr::lag(time, default = 0)) %>%
  dplyr::ungroup()

# Complete-case variables
all_covs <- c(
  "time0", "time", "c_coup", "c_natural", "c_revolt",
  "legislature", "leg_growth_2", "dyn.estimates_lag",
  "gdp_1k", "chgdpen_fearonlaitin", "Oil_fearonlaitin",
  "postcoldwarlag", "civiliandictatorshiplag", "militarydictatorshiplag",
  "communist", "lpopl1_fearonlaitin", "ethfrac_fearonlaitin",
  "relfrac_fearonlaitin", "age"
)

df_cc <- df_master %>%
  dplyr::filter(complete.cases(dplyr::select(., dplyr::all_of(all_covs))))

# --- ANALYSIS ----------------------------------------------------------------
# Weibull survival models
fit_coups_restr <- flexsurv::flexsurvreg(
  survival::Surv(time0, time, c_coup) ~ legislature + leg_growth_2 +
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin + postcoldwarlag +
    civiliandictatorshiplag + militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin + relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_coups_new <- flexsurv::flexsurvreg(
  survival::Surv(time0, time, c_coup) ~ dyn.estimates_lag +
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin + postcoldwarlag +
    civiliandictatorshiplag + militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin + relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_natural_restr <- flexsurv::flexsurvreg(
  survival::Surv(time0, time, c_natural) ~ legislature + leg_growth_2 +
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin + postcoldwarlag +
    civiliandictatorshiplag + militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin + relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_natural_new <- flexsurv::flexsurvreg(
  survival::Surv(time0, time, c_natural) ~ dyn.estimates_lag +
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin + postcoldwarlag +
    civiliandictatorshiplag + militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin + relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_revolt_restr <- flexsurv::flexsurvreg(
  survival::Surv(time0, time, c_revolt) ~ legislature + leg_growth_2 +
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin + postcoldwarlag +
    civiliandictatorshiplag + militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin + relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

fit_revolt_new <- flexsurv::flexsurvreg(
  survival::Surv(time0, time, c_revolt) ~ dyn.estimates_lag +
    gdp_1k + chgdpen_fearonlaitin + Oil_fearonlaitin + postcoldwarlag +
    civiliandictatorshiplag + militarydictatorshiplag + communist +
    lpopl1_fearonlaitin + ethfrac_fearonlaitin + relfrac_fearonlaitin + age,
  data = df_cc, dist = "weibull"
)

# --- OUTPUT ------------------------------------------------------------------
texreg::texreg(list(
  fit_natural_restr, fit_natural_new,
  fit_coups_restr,   fit_coups_new,
  fit_revolt_restr,  fit_revolt_new
))
